#############
#############
#############
## Stefan Müller and Sven-Oliver Proksch: 
## Nostalgia in European Party Politics:
## A Text-Based Measurement Approach
## British Journal of Political Science
##
## Script returns all tables and plots 
## reported in SI Section A
## (Figure A1, Figure A2, Table A1)
#############
#############
#############

# set working directory, either by using the here() package, 
# setwd() or by creating an RProj file

# load packages
library(dplyr)    # CRAN v1.1.2 
library(ggplot2)  # CRAN v3.4.2 
library(xtable)   # CRAN v1.8-4
library(Hmisc)    # CRAN v5.1-0
library(stringr)  # CRAN v1.5.0
library(ggridges) # CRAN v0.5.4
library(tidyr)    # CRAN v1.3.0

# If the code does not run, one or more packages may have been 
# updated, which may result in errors or conflicts. You can solve this issue
# by installing the package version listed above or by using the 
# groundhog package:
# after installing groundhog using install.packages("groundhog")
# change library(name_of_package) to
# groundhog::groundhog.library(name_of_package, date = "2023-09-04")
# Instead of adjusting the library() function for each package, 
# you can adjust them at all once using the
# the following syntax:
# groundhog.library("
#                   library('pkgA')
#                   library('pkgB')
#                   library('pkgC')", date = "2023-09-04")
# More details are available at: https://groundhogr.com/using/

# function for custom ggplot2 scheme
source("function_theme_base.R")

# dataset with one observation per manifesto
dat_manifestolevel <- readRDS("data_nostalgia_manifestolevel.rds")


# data frame with availability of manifestos across countries
cmp_en_sum <- dat_manifestolevel %>% 
    dplyr::select(countryname, edate, manifesto_id, real_gdp_growth) %>% 
    unique() %>% 
    group_by(countryname, edate) %>% 
    count()

length(unique(dat_manifestolevel$party))
length(unique(dat_manifestolevel$countryname))

dat_manifestolevel %>% 
    group_by(countryname) %>% 
    mutate(n_manifestos = n()) %>% 
    ungroup() %>% 
    summarise(mean = mean(n_manifestos),
              min = min(n_manifestos),
              max = max(n_manifestos))


dat_sum_country <- dat_manifestolevel %>% 
    group_by(countryname) %>% 
    summarise(n_manifestos = n()) |> 
    arrange(-n_manifestos)


dat_manifestolevel %>% 
    group_by(countryname) %>% 
    summarise(n_manifestos = n(),
              n_elections = length(unique(election_id))) |> 
    ungroup() |> 
    summarise(manifestos = mean(n_manifestos),
              elections = mean(n_elections))


head(dat_sum_country)
tail(dat_sum_country)

dat_manifestolevel %>% 
    select(election_id, countryname) %>% 
    unique() %>% 
    group_by(countryname) %>% 
    mutate(n_elections = n()) %>%
    ungroup() %>% 
    summarise(mean = mean(n_elections),
              min = min(n_elections),
              max = max(n_elections))


# input data for Figure A1 ----
cmp_en_sum_valid <- cmp_en_sum %>% 
    filter(!is.na(countryname)) %>% 
    ungroup() %>% 
    group_by(countryname) %>% 
    mutate(n_country = sum(n)) %>% 
    ungroup() %>% 
    group_by(countryname) %>% 
    mutate(n_country_election = length(unique(edate))) %>% 
    mutate(countryname_n_election = paste0(countryname, 
                                           " (", n_country, "; ",
                                           n_country_election, ")"))


# convert edate to date format
cmp_en_sum_valid$edate <- as.Date(cmp_en_sum_valid$edate)


# Figure A01 ----
ggplot(cmp_en_sum_valid, 
       aes(x = edate, 
           y = forcats::fct_rev(factor(countryname_n_election)))) + 
    geom_line(alpha = 0.6) +
    geom_point(alpha = 1, size = 3.5) +
    scale_x_date(breaks = "4 years",
                 date_labels = "%Y") +
    labs(x = "Election year", y = NULL) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggsave("fig_a01.pdf",
       width = 9, height = 7)

dat_manifestolevel$decade2 <- paste0(dat_manifestolevel$decade, "s")

# Figure A02 ----
ggplot(dat_manifestolevel, aes(x = n_sentences_manifesto,
                               y = forcats::fct_rev(decade2))) +
    stat_density_ridges(quantile_lines = TRUE, 
                        quantiles = 2,
                        fill = "grey90") +
    scale_x_log10() +
    labs(x = "Number of Manifesto Sentences (Log Scale)",
         y = "Decade")
ggsave("fig_a02.pdf",
       width = 9, height = 5)

# prepare data for Table A01 

dat_minmax_bert <- dat_manifestolevel |> 
    group_by(region, countryname, party, party_family_recoded,
             partyname, partyabbrev, populism_popu_list_categories) |> 
    mutate(n_manifestos = n()) |> 
    filter(n_manifestos >= 2) |> 
    summarise(mean_party = mean(nostalgia_sentences_per_1000_bert)) |> 
    group_by(countryname) |> 
    mutate(mean_nostalgia = mean(mean_party)) |> 
    group_by(countryname) |> 
    arrange(countryname, -mean_party) |> 
    filter(row_number()==1 | row_number()==n())


# clean up party family abbreviations
dat_minmax_bert_clean <- dat_minmax_bert |> 
    mutate(partyabbrev = ifelse(countryname == "Norway" & party == "12221", "SF",
                                partyabbrev)) |> 
    mutate(partyabbrev = ifelse(countryname == "France" & party == "31621", "UMP",
                                partyabbrev)) |>
    mutate(partyabbrev = ifelse(countryname == "Ireland" & party == "53620", "FF",
                                partyabbrev)) |> 
    mutate(partyabbrev = ifelse(countryname == "Latvia" & party == "87071", "LNNK",
                                partyabbrev)) |> 
    mutate(partyabbrev = ifelse(party == "51420", "Lib P.", partyabbrev)) |> 
    mutate(partyabbr_recoded = dplyr::recode(partyabbrev, 
                                             "KSČM" = "KSCM")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Democratic", "Dem.")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Christian", "Chr.")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Nationalist", "Nat.")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Conservative", "Cons."))


dat_minmax_bert_clean <- dat_minmax_bert_clean |>
    mutate(party_family_recoded = dplyr::recode(party_family_recoded, "Other" = "Oth.")) |> 
    mutate(party_family_recoded = paste0("(", party_family_recoded, ")")) |> 
    mutate(partyabbr_recoded = ifelse(partyabbr_recoded == "GERB", "    GERB", 
                                      partyabbr_recoded)) |> 
    mutate(partyabbr_recoded = ifelse(party == "41111", "GP", partyabbr_recoded)) |> 
    mutate(partyabbr_recoded = ifelse(party == "15450", "LibRefP", partyabbr_recoded)) |> 
    mutate(partyabbr_recoded = ifelse(party == "22711", "CD", partyabbr_recoded)) |> 
    mutate(party_family_recoded = ifelse(partyabbr_recoded == "GERB", "   Christian Dem.", 
                                         party_family_recoded)) |> 
    mutate(party_family_recoded = ifelse(partyabbr_recoded == "DPS", "(Oth.)        ", 
                                         party_family_recoded))


# relevel factor levels for region
dat_minmax_bert_clean$region <- factor(dat_minmax_bert_clean$region,
                                       levels = c("Central and Eastern Europe",
                                                  "Southern Europe",
                                                  "Western Europe",
                                                  "Northern Europe"))

# get table with full party names
dat_partyabbrev <- dat_minmax_bert_clean |> 
    mutate(Populist = str_replace_all(populism_popu_list_categories, "Populist", "Pop.")) |> 
    mutate(partyabbr_recoded = str_squish(partyabbr_recoded)) |> 
    select(Country = countryname, 
           `Party Abbr.` = partyabbr_recoded,
           Party = partyname,
           Populist) |> 
    arrange(Country)


# Table A1 ----
print(xtable(dat_partyabbrev,
             digits = 1,
             caption = "List of parties listed in Figure \\ref{fig:nostalgia_partylevel}.",
             label="tab:party_abbr",
             align= c("p{0.025\\textwidth}", 
                      "p{0.18\\textwidth}",
                      "p{0.14\\textwidth}",
                      "p{0.5\\textwidth}",
                      "p{0.15\\textwidth}")),
      type = "latex",
      digits = 1,
      size = "footnotesize",
      file="tab_a01.tex",
      include.rownames = FALSE,
      caption.placement = "top")
